********************************************************************************
*Clean raw HCAHPS data
********************************************************************************
global data "\replication\public_data\hcahps_data" 

cd "$data"
capture mkdir output
	
********************************************************************************
*0. Clean 2011-12 format
forvalues i = 2012/2013 {
	*Note: Hospital Compare data is labeled with following year. 
	*Variables: overall rating, would recommend, communicated well, quick care
	
	insheet using "`i'HCAHPS Measures.csv", comma clear names
	ren prov ccn
	ren recc recommend
	keep ccn communic_* quick rating recommend 
	foreach var in communic_nurse communic_dr quick rating recommend {
	replace `var' = "" if `var'=="Not Available" | `var'=="Not Applicable"
	destring `var', replace
	}
	gduplicates drop
	tempfile hc`i'
	save `hc`i'', replace 
}

********************************************************************************
*1. Clean 2013-18 format
forvalues i = 2014/2019 {
	insheet using "`i'HCAHPS - Hospital.csv", comma clear names
	capture ren prov ccn
	capture ren facilityid ccn
	capture destring ccn, replace ignore("F")
	
	keep ccn hcahpsmeasureid hcahpsquestion hcahpsanswerdescription hcahpsanswerpercent
	ren hcahpsanswerpercent pct_ans
	ren hcahpsmeasureid id 
	replace pct_ans = "" if pct_ans=="Not Available" | pct_ans=="Not Applicable"
	destring pct_ans, replace
	
	keep if inlist(id,"H_COMP_1_A_P","H_COMP_2_A_P","H_COMP_3_A_P","H_HSP_RATING_9_10","H_RECMND_DY")
	gen recommend      = pct_ans if id=="H_RECMND_DY"
	gen rating 	       = pct_ans if id=="H_HSP_RATING_9_10" 
	gen communic_dr    = pct_ans if id=="H_COMP_2_A_P" 
	gen communic_nurse = pct_ans if id=="H_COMP_1_A_P" 
	gen quick          = pct_ans if id=="H_COMP_3_A_P" 
	
	foreach var in recommend rating communic_dr communic_nurse quick {
	bysort ccn: gegen `var'2 = mean(`var')
	drop `var' 
	rename `var'2 `var'
	replace `var' = . if `var'==0
	sum `var', d
	}
	keep ccn recommend rating communic_* quick
	gduplicates drop 
	gduplicates report ccn 
	tempfile hc`i'
	save `hc`i'', replace 
}

********************************************************************************
*2. Append years
	{
	use `hc2012',clear
	gen year = 2012
	forvalues i=2013/2019{
	append using `hc`i''
	replace year = `i' if missing(ye)
	}
	
	*Note: Hospital Compare data is labeled with following year. 
	replace year = year - 1
	tab ye 
	sort ccn ye 
	qui compress
	}
	save "output\quality_11_18.dta", replace